* load PISA test scores 2000-2018 from:
* 	pisadataexplorer.oecd.org

cd "~/Documents/econ/research/current/cognitive/estimation/data"

import excel using pisa_2000_2018.xls, firstrow clear ///
cellrange(b12:d621) sheet("Report 1- Table")
rename *, lower
rename yearstudy year
rename jurisdiction country
rename average average1
destring year average1, force replace
replace year = year[_n-1] if year==.
save pisa, replace

forvalues r=2/3 {
	import excel using pisa_2000_2018.xls, firstrow clear ///
	cellrange(b12:d621) sheet("Report `r'- Table")
	rename *, lower
	rename yearstudy year
	rename jurisdiction country
	rename average average`r'
	destring year average`r', force replace
	replace year = year[_n-1] if year==.
	merge 1:1 country year using pisa
	drop _merge
	save pisa, replace
}
rename (average1 average2 average3) (pisaread pisamath pisasci)
drop if regexm(country, "\(2015\)")	// for 4 countries, 
		// 2015 is not comparable, but available for other years

kountry country, from(other) stuck
rename _ISO3N_ iso3n
kountry iso3n, from(iso3n) to(iso3c)
rename _ISO3C_ countrycode
drop iso3n
drop if countrycode=="" // a few subnational regions
order countrycode year country pisa*
sort countrycode year

keep if year==2018
drop year
drop if pisasci==.
sort countrycode
save pisa2018, replace

/*
// using mi impute didn't work - just used 2015

encode countrycode, gen(isonum)	

reshape wide pisa*, i(countrycode isonum country) j(year)
foreach y in 2005 2010 {
	gen pisaread`y' = .
	gen pisamath`y' = .
	gen pisasci`y' = .
}
order countrycode isonum country pisa*, sequential

mi set wide
mi register imputed pisa*
mi impute chained (regress) pisa*, add(10) rseed(120222)
mi reshape long pisa*, i(countrycode isonum country) j(year)
mi estimate: xtreg pisaread pisamath pisasci, fe i(isonum)
saveold pisa, replace
